Purpose: Retrieve full list of UK stations from http://orr.gov.uk/statistics/published-stats/station-usage-estimates

Create tables in the database and add data


In [1]:
import pandas as pd
pd.options.display.max_columns = 999

In [2]:
stations_df = pd.read_excel("http://orr.gov.uk/__data/assets/excel_doc/0019/20179/Estimates-of-Station-Usage-in-2014-15.xlsx",2)

In [3]:
stations_df.head()


Out[3]:
NLC TLC Station Name Government Office Region (GOR) County or Unitary Authority District or Unitary Authority NUTS2 Spatial_Unit Code NUTS2 Spatial Unit Station Facility Owner Station Group PTE Urban Area Station London Travelcard Area SRS Code SRS Description NR Route CRP Line Designation OS Grid Northing OS Grid Easting Entries_Full Entries_Reduced Entries_Season Entries_Total Exits_Full Exits_Reduced Exits_Season Exits_Total 1415 Entries & Exits 1314 Entries & Exits 1415 Interchanges Large station Flag Small Station Flag Explanation of large change Sources
0 5131 ABW Abbey Wood London Greater London Greenwich UKI2 Outer London Southeastern NaN NaN London Travelcard Area Station A.07 Dartford Lines to Gravesend and Hayes Branch South East NaN 179035 547310 332510 435776 891418 1659704 332510 435776 891418 1659704 3319408 3282240 0.0000 0 0 NaN NaN
1 3813 ABE Aber Wales - Cymru Caerphilly - Caerffili Caerphilly - Caerffili UKL1 West Wales and the Valleys Arriva Trains Wales NaN NaN NaN L.18 South Wales Valleys Wales NaN 187030 314910 46815 3371 56087 106273 46815 3371 56087 106273 212546 219868 0.0000 0 0 NaN NaN
2 3801 ACY Abercynon Wales - Cymru Rhondda Cynon Taff - Rhondda Cynon Taf Rhondda Cynon Taff - Rhondda Cynon Taf UKL1 West Wales and the Valleys Arriva Trains Wales NaN NaN NaN L.18 South Wales Valleys Wales NaN 194590 307945 73186 6716 52827 132729 73186 6716 52827 132729 265458 251688 40847.1439 0 0 NaN NaN
3 3982 ABA Aberdare Wales - Cymru Rhondda Cynon Taff - Rhondda Cynon Taf Rhondda Cynon Taff - Rhondda Cynon Taf UKL1 West Wales and the Valleys Arriva Trains Wales NaN NaN NaN I.99 Other Freight Lines Wales NaN 203300 300100 160625 19037 96302 275964 160625 19037 96302 275964 551928 557992 0.0000 0 0 NaN NaN
4 8976 ABD Aberdeen Scotland Aberdeen City Aberdeen City UKM1 North Eastern Scotland ScotRail NaN NaN NaN P.09 Dundee - Aberdeen Scotland NaN 805890 394095 654309 891364 325650 1871323 654309 891364 325650 1871323 3742646 3600268 219002.4081 0 0 NaN NaN

In [4]:
headers = [h.lower() for h in list(stations_df.columns)]
headers = [h.replace(" ", "_").replace("(", "").replace(")","") for h in headers]
stations_df.columns = headers
stations_df.head()
stations_df["london_or_gb"] = "gb"
stations_df.loc[stations_df["county_or_unitary_authority"] == "Greater London","london_or_gb"] = "london"
stations_df.head()


Out[4]:
nlc tlc station_name government_office_region_gor county_or_unitary_authority district_or_unitary_authority nuts2_spatial_unit_code nuts2_spatial_unit station_facility_owner station_group pte_urban_area_station london_travelcard_area srs_code srs_description nr_route crp_line_designation os_grid_northing os_grid_easting entries_full entries_reduced entries_season entries_total exits_full exits_reduced exits_season exits_total 1415_entries_&_exits 1314_entries_&_exits 1415_interchanges large_station_flag small_station_flag explanation_of_large_change sources london_or_gb
0 5131 ABW Abbey Wood London Greater London Greenwich UKI2 Outer London Southeastern NaN NaN London Travelcard Area Station A.07 Dartford Lines to Gravesend and Hayes Branch South East NaN 179035 547310 332510 435776 891418 1659704 332510 435776 891418 1659704 3319408 3282240 0.0000 0 0 NaN NaN london
1 3813 ABE Aber Wales - Cymru Caerphilly - Caerffili Caerphilly - Caerffili UKL1 West Wales and the Valleys Arriva Trains Wales NaN NaN NaN L.18 South Wales Valleys Wales NaN 187030 314910 46815 3371 56087 106273 46815 3371 56087 106273 212546 219868 0.0000 0 0 NaN NaN gb
2 3801 ACY Abercynon Wales - Cymru Rhondda Cynon Taff - Rhondda Cynon Taf Rhondda Cynon Taff - Rhondda Cynon Taf UKL1 West Wales and the Valleys Arriva Trains Wales NaN NaN NaN L.18 South Wales Valleys Wales NaN 194590 307945 73186 6716 52827 132729 73186 6716 52827 132729 265458 251688 40847.1439 0 0 NaN NaN gb
3 3982 ABA Aberdare Wales - Cymru Rhondda Cynon Taff - Rhondda Cynon Taf Rhondda Cynon Taff - Rhondda Cynon Taf UKL1 West Wales and the Valleys Arriva Trains Wales NaN NaN NaN I.99 Other Freight Lines Wales NaN 203300 300100 160625 19037 96302 275964 160625 19037 96302 275964 551928 557992 0.0000 0 0 NaN NaN gb
4 8976 ABD Aberdeen Scotland Aberdeen City Aberdeen City UKM1 North Eastern Scotland ScotRail NaN NaN NaN P.09 Dundee - Aberdeen Scotland NaN 805890 394095 654309 891364 325650 1871323 654309 891364 325650 1871323 3742646 3600268 219002.4081 0 0 NaN NaN gb

In [5]:
# Now write out to postgres
from mylibrary.connections import conn, engine, cursor
stations_df.to_sql("all_stations", engine, schema="tt_gh", if_exists="replace", index=False)


/Users/robinlinacre/anaconda/lib/python2.7/site-packages/sqlalchemy/dialects/postgresql/base.py:2505: SAWarning: Did not recognize type 'geometry' of column 'geom'
  (attype, name))

In [6]:
# Create geometry column for the points including a spatial index for efficient querying

sql = """
SELECT AddGeometryColumn ('tt_gh', 'all_stations', 'geom', 27700, 'POINT', 2);
UPDATE tt_gh.all_stations SET geom = ST_GeomFromText('POINT(' || os_grid_easting || ' ' || os_grid_northing || ')', 27700 );
CREATE INDEX idx_geom_all_stations_points ON tt_gh.all_stations USING gist(geom);
"""
cursor.execute(sql)
conn.commit()

In [7]:
#Make a lat and lng column

sql = """

ALTER TABLE tt_gh.all_stations ADD lat float, ADD lng float, ADD icscode text, 
ADD icscode_status text, ADD tfl_request text, ADD tfl_response json, ADD tfl_message  text;
UPDATE tt_gh.all_stations SET
    lng = ST_X(ST_TRANSFORM(geom, 4326)),
    lat = ST_Y(ST_TRANSFORM(geom,4326));
ALTER TABLE tt_gh.all_stations  ADD PRIMARY KEY (nlc);
""" 
cursor.execute(sql)
conn.commit()

In [8]:
sql = """
select * from tt_gh.all_stations limit 5
"""
pd.read_sql(sql, conn)


Out[8]:
nlc tlc station_name government_office_region_gor county_or_unitary_authority district_or_unitary_authority nuts2_spatial_unit_code nuts2_spatial_unit station_facility_owner station_group pte_urban_area_station london_travelcard_area srs_code srs_description nr_route crp_line_designation os_grid_northing os_grid_easting entries_full entries_reduced entries_season entries_total exits_full exits_reduced exits_season exits_total 1415_entries_&_exits 1314_entries_&_exits 1415_interchanges large_station_flag small_station_flag explanation_of_large_change sources london_or_gb geom lat lng icscode icscode_status tfl_request tfl_response tfl_message
0 5131 ABW Abbey Wood London Greater London Greenwich UKI2 Outer London Southeastern None None London Travelcard Area Station A.07 Dartford Lines to Gravesend and Hayes Branch South East None 179035 547310 332510 435776 891418 1659704 332510 435776 891418 1659704 3319408 3282240 0.0000 0 0 None None london 0101000020346C000000000000DCB3204100000000D8DA... 51.491077 0.120473 None None None None None
1 3813 ABE Aber Wales - Cymru Caerphilly - Caerffili Caerphilly - Caerffili UKL1 West Wales and the Valleys Arriva Trains Wales None None None L.18 South Wales Valleys Wales None 187030 314910 46815 3371 56087 106273 46815 3371 56087 106273 212546 219868 0.0000 0 0 None None gb 0101000020346C0000000000007838134100000000B0D4... 51.575689 -3.229271 None None None None None
2 3801 ACY Abercynon Wales - Cymru Rhondda Cynon Taff - Rhondda Cynon Taf Rhondda Cynon Taff - Rhondda Cynon Taf UKL1 West Wales and the Valleys Arriva Trains Wales None None None L.18 South Wales Valleys Wales None 194590 307945 73186 6716 52827 132729 73186 6716 52827 132729 265458 251688 40847.1439 0 0 None None gb 0101000020346C000000000000A4CB124100000000F0C0... 51.642549 -3.331737 None None None None None
3 3982 ABA Aberdare Wales - Cymru Rhondda Cynon Taff - Rhondda Cynon Taf Rhondda Cynon Taff - Rhondda Cynon Taf UKL1 West Wales and the Valleys Arriva Trains Wales None None None I.99 Other Freight Lines Wales None 203300 300100 160625 19037 96302 275964 160625 19037 96302 275964 551928 557992 0.0000 0 0 None None gb 0101000020346C000000000000105112410000000020D1... 51.719497 -3.447568 None None None None None
4 8976 ABD Aberdeen Scotland Aberdeen City Aberdeen City UKM1 North Eastern Scotland ScotRail None None None P.09 Dundee - Aberdeen Scotland None 805890 394095 654309 891364 325650 1871323 654309 891364 325650 1871323 3742646 3600268 219002.4081 0 0 None None gb 0101000020346C000000000000BC0D1841000000000498... 57.143864 -2.099214 None None None None None

This was tested in QGIS

<img src="pics/london_stations.png", style="width:200px;">


In [9]:
sql = """
select count(*), london_or_gb  from tt_gh.all_stations group by london_or_gb
"""
pd.read_sql(sql, conn)


Out[9]:
count london_or_gb
0 329 london
1 2210 gb